This script is set to map the annotated SC dataset on to the spatial transcriptomics Visium slide. In this script we use the Visium data coming from 03-Clustering/03-clustering and the SC data comming from /scratch/devel/rmassoni/tonsil_atlas/current/scRNA-seq/results/R_objects/final_clusters/tonsil_atlas_all_cells_20210930.rds.
library(Seurat)
library(ggpubr)
library(cowplot)
library(dplyr)
library(ggplot2)
library(stringr)
library(readr)
library(Matrix)
library(SPOTlight)
Loading necessary paths and parameters
set.seed(123)
source(here::here("misc/paths.R"))
source(here::here("utils/bin.R"))
"{fig1}/{plt_dir}" %>%
glue::glue() %>%
here::here() %>%
dir.create(path = .,
showWarnings = FALSE,
recursive = TRUE)
"{fig1}/{robj_dir}" %>%
glue::glue() %>%
here::here() %>%
dir.create(path = .,
showWarnings = FALSE,
recursive = TRUE)
clust_vr <- "annotation_figure_1"
We have 8 different datasets that we have integrated in previous scripts - 03-clustering/03-clustering_integration.Rmds. We are going to analyze the integrated dataset all together since the regions are shared across all
# Load Tonsils integrated
sp_obj <- "misc/{robj_dir}/20220215_tonsil_atlas_spatial_seurat_obj.rds" %>%
glue::glue() %>%
here::here() %>%
readRDS(file = .)
# Load single-cell dataset
sc_obj <- "misc/{robj_dir}/20220215_tonsil_atlas_rna_seurat_obj.rds" %>%
glue::glue() %>%
here::here() %>%
readRDS(file = .)
Look at cell types
flextable::flextable(data.frame(table(sc_obj@meta.data[, clust_vr])))
Var1 | Freq |
Activated NBC | 13,702 |
CD4 T | 38,929 |
CD8 T | 4,835 |
cycling FDC | 223 |
cycling myeloid | 97 |
cycling T | 1,860 |
DC | 992 |
DN | 694 |
epithelial | 361 |
FDC | 980 |
GCBC | 72,724 |
Granulocytes | 157 |
ILC | 327 |
Mast | 40 |
MBC | 45,395 |
Mono/Macro | 1,353 |
Naive CD4 T | 13,378 |
Naive CD8 T | 4,431 |
NBC | 52,831 |
NK | 338 |
PC | 9,088 |
PDC | 482 |
preB/T | 82 |
sc_obj@meta.data <- sc_obj@meta.data %>%
dplyr::mutate(
annotation_figure_1 = dplyr::case_when(
# Join all B cells
annotation_figure_1 %in% c("cycling FDC", "Cycling", "cycling T", "cycling myeloid") ~ "Cycling",
TRUE ~ annotation_figure_1))
First of all we need to compute the markers for the cell types using Seurat's funtion FindAllMarkers. For the high level annotation we will compute the markers using the function FindAllMarkers.
Seurat::Idents(sc_obj) <- sc_obj@meta.data[, clust_vr]
sc_markers <- Seurat::FindAllMarkers(
object = sc_obj,
assay = "RNA",
slot = "data",
only.pos = TRUE,
max.cells.per.ident = 500)
"{fig1}/{robj_dir}/sc_markers_fig1.rds" %>%
glue::glue() %>%
here::here() %>%
saveRDS(object = sc_markers, file = .)
Filter markers and look
sc_markers <- "{fig1}/{robj_dir}/sc_markers_fig1.rds" %>%
glue::glue() %>%
here::here() %>%
readRDS(file = .)
# sc_markers <- sc_markers %>%
# dplyr::filter(avg_log2FC >= 0.5)
DT::datatable(sc_markers, filter = "top")
Make sure all the cell types have marker genes post filtering
count_df <- dplyr::count(sc_markers, cluster)
flextable::flextable(count_df)
cluster | n |
Mono/Macro | 1,010 |
DC | 1,212 |
Granulocytes | 461 |
Mast | 737 |
Cycling | 944 |
epithelial | 1,122 |
PDC | 1,998 |
FDC | 746 |
preB/T | 400 |
NK | 816 |
ILC | 1,056 |
Naive CD8 T | 427 |
CD8 T | 566 |
DN | 1,104 |
CD4 T | 682 |
Naive CD4 T | 451 |
PC | 395 |
MBC | 563 |
NBC | 257 |
Activated NBC | 285 |
GCBC | 1,470 |
unique(sc_obj@meta.data[, clust_vr]) %in% count_df$cluster
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
Subset SC object so all the cells from the same cell type to come from the same batch
# subset most representative sample(s)
ns <- with(sc_obj@meta.data, table(gem_id, annotation_figure_1))
ns <- as.matrix(unclass(ns))
m <- 100 # Max cells per cell type
# Extract cell barcodes
meta <- sc_obj@meta.data
id <- lapply(colnames(ns), function(nm) {
x <- ns[, nm]
# Initialize variables
n <- 0 # N of cells
s <- c() # Gem IDs
b <- c() # Cell barcodes
while (n < m & length(x) > 0) {
# select gem id with the most cells
i <- which.max(x)
# Add gem id to vector s
s <- c(s, names(i))
# Add number of cells per cell type to n
n <- n + x[i]
# Remove gem id from x to move on to the next
x <- x[-i]
# extract barcode
barcode <- rownames(meta[meta[, "gem_id"] == names(i) &
meta[, "annotation_figure_1"] == nm, ])
# make sure it adds up to m
# print(barcode)
if (length(b) + length(barcode) > m) {
barcode <- sample(x = barcode, size = m - length(b), replace = FALSE)
}
b <- c(b, barcode) # Cell barcodes
# print(b)
}
return(b)
})
sc_sub <- sc_obj[, unlist(id)]
Remove empty genes
table(Matrix::rowSums(sp_obj@assays$Spatial@counts) == 0)
##
## FALSE TRUE
## 26759 87
table(Matrix::rowSums(sc_sub@assays$RNA@counts) == 0)
##
## FALSE TRUE
## 23820 13558
sc_sub <- sc_sub[Matrix::rowSums(sc_sub@assays$RNA@counts) != 0, ]
sp_obj <- sp_obj[Matrix::rowSums(sp_obj@assays$Spatial@counts) != 0, ]
Run deconvolution using the scRNAseq and the spatial transcriptomics data
decon_mtrx_ls <- SPOTlight::spotlight_deconvolution(
se_sc = sc_obj,
counts_spatial = sp_obj@assays$Spatial@counts,
clust_vr = clust_vr,
cluster_markers = sc_markers,
cl_n = 50,
hvg = 3000,
ntop = NULL,
transf = "uv",
method = "nsNMF",
min_cont = 0,
assay = "RNA",
slot = "counts")
Change names to remove . by original name
# names_df <- data.frame(
# plt_name = unique(sc_obj@meta.data[, clust_vr]),
# ct_name = stringr::str_replace_all(
# string = unique(sc_obj@meta.data[, clust_vr]),
# pattern = "[:punct:]|[:blank:]|[+]", "."))
#
#
# cnames <- colnames(decon_mtrx_ls[[2]])
# new_names <- data.frame("ct_name" = cnames) %>%
# dplyr::left_join(names_df, by = "ct_name") %>%
# dplyr::pull("plt_name")
# colnames(decon_mtrx_ls[[2]]) <- new_names
"{fig1}/{robj_dir}/decon_mtrx_{clust_vr}.rds" %>%
glue::glue() %>%
here::here() %>%
saveRDS(
object = decon_mtrx_ls,
file = .)
Before even looking at the decomposed spots we can gain insight on how well the model performed by looking at the topic profiles for the cell types.
nmf_mod <- decon_mtrx_ls[[1]]
decon_mtrx <- decon_mtrx_ls[[2]]
rownames(decon_mtrx) <- colnames(sp_obj)
The first thing we can do is look at how specific the topic profiles are for each cell type.
h <- NMF::coef(nmf_mod[[1]])
rownames(h) <- paste("Topic", 1:nrow(h), sep = "_")
topic_profile_plts <- SPOTlight::dot_plot_profiles_fun(
h = h,
train_cell_clust = nmf_mod[[2]])
topic_profile_plts[[2]] +
ggplot2::theme(
axis.text.x = ggplot2::element_text(angle = 90),
axis.text = ggplot2::element_text(size = 12))
Next we can take a look at the how the individual topic profiles of each cell within each cell-type behave. Here we expect that all the cells from the same cell type show a similar topic profile distribution, if not there might be a bit more substructure in that cluster and we may only be capturing one or the other.
topic_profile_plts[[1]] +
ggplot2::theme(
axis.text.x = ggplot2::element_text(angle = 90),
axis.text = ggplot2::element_text(size = 12))
Lastly we can take a look at which genes are the most important for each topic and therefore get an insight into which genes are driving them.
basis_spotlight <- data.frame(NMF::basis(nmf_mod[[1]]))
colnames(basis_spotlight) <- glue::glue("Topic {1:length(unique(nmf_mod[[2]]))}")
DT::datatable(round(basis_spotlight, 5),
filter = "top")
sessionInfo()
## R version 4.0.1 (2020-06-06)
## Platform: x86_64-conda_cos6-linux-gnu (64-bit)
## Running under: Red Hat Enterprise Linux Server release 6.7 (Santiago)
##
## Matrix products: default
## BLAS/LAPACK: /scratch/groups/singlecell/software/anaconda3/envs/spatial_r/lib/libopenblasp-r0.3.12.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=es_ES.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=es_ES.UTF-8 LC_MESSAGES=en_US.UTF-8 LC_PAPER=es_ES.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] tidyr_1.2.0 nnls_1.4 edgeR_3.32.1 limma_3.46.0 NMF_0.23.0 Biobase_2.50.0 BiocGenerics_0.36.1 cluster_2.1.2 rngtools_1.5.2 pkgmaker_0.32.2 registry_0.5-1 tibble_3.1.6 purrr_0.3.4 SPOTlight_0.1.7 Matrix_1.4-0 readr_2.1.2 stringr_1.4.0 dplyr_1.0.8 cowplot_1.1.1 ggpubr_0.4.0 ggplot2_3.3.5 SeuratObject_4.0.4 Seurat_4.1.0
##
## loaded via a namespace (and not attached):
## [1] uuid_1.0-3 backports_1.4.1 systemfonts_1.0.4 plyr_1.8.6 igraph_1.2.11 lazyeval_0.2.2 splines_4.0.1 crosstalk_1.2.0 listenv_0.8.0 scattermore_0.8 gridBase_0.4-7 digest_0.6.29 foreach_1.5.2 htmltools_0.5.2 fansi_1.0.2 magrittr_2.0.2 doParallel_1.0.17 tensor_1.5 ROCR_1.0-11 tzdb_0.2.0 globals_0.14.0 matrixStats_0.61.0 officer_0.4.1 vroom_1.5.7 spatstat.sparse_2.1-0 colorspace_2.0-3 ggrepel_0.9.1 xfun_0.30 crayon_1.5.0 jsonlite_1.8.0 spatstat.data_2.1-2 iterators_1.0.14 survival_3.3-1 zoo_1.8-9 glue_1.6.2 polyclip_1.10-0 gtable_0.3.0 leiden_0.3.9 car_3.0-12 future.apply_1.8.1 abind_1.4-5 scales_1.1.1 DBI_1.1.2 rstatix_0.7.0 spatstat.random_2.1-0 miniUI_0.1.1.1 Rcpp_1.0.8 viridisLite_0.4.0 xtable_1.8-4 reticulate_1.24 spatstat.core_2.4-0 bit_4.0.4 DT_0.21 htmlwidgets_1.5.4
## [55] httr_1.4.2 RColorBrewer_1.1-2 ellipsis_0.3.2 ica_1.0-2 farver_2.1.0 pkgconfig_2.0.3 sass_0.4.0 uwot_0.1.11 deldir_1.0-6 locfit_1.5-9.4 utf8_1.2.2 here_1.0.1 labeling_0.4.2 tidyselect_1.1.2 rlang_1.0.2 reshape2_1.4.4 later_1.3.0 munsell_0.5.0 tools_4.0.1 cli_3.2.0 generics_0.1.2 broom_0.7.12 ggridges_0.5.3 evaluate_0.15 fastmap_1.1.0 yaml_2.3.5 goftest_1.2-3 knitr_1.37 bit64_4.0.5 fitdistrplus_1.1-6 zip_2.2.0 RANN_2.6.1 pbapply_1.5-0 future_1.24.0 nlme_3.1-155 mime_0.12 xml2_1.3.3 compiler_4.0.1 plotly_4.10.0 png_0.1-7 ggsignif_0.6.3 spatstat.utils_2.3-0 bslib_0.3.1 stringi_1.7.6 highr_0.9 gdtools_0.2.4 lattice_0.20-45 vctrs_0.3.8 pillar_1.7.0 lifecycle_1.0.1 spatstat.geom_2.3-2 lmtest_0.9-39 jquerylib_0.1.4 RcppAnnoy_0.0.19
## [109] data.table_1.14.2 irlba_2.3.5 flextable_0.7.0 httpuv_1.6.5 patchwork_1.1.1 R6_2.5.1 promises_1.2.0.1 KernSmooth_2.23-20 gridExtra_2.3 parallelly_1.30.0 codetools_0.2-18 MASS_7.3-55 assertthat_0.2.1 rprojroot_2.0.2 withr_2.5.0 sctransform_0.3.3 mgcv_1.8-39 hms_1.1.1 grid_4.0.1 rpart_4.1.16 rmarkdown_2.12 carData_3.0-5 Rtsne_0.15 shiny_1.7.1 base64enc_0.1-3